# Replication code for Taylor C. Boas and Amy Erica Smith, “Looks Like Me, Thinks Like Me: Descriptive Representation and Opinion Congruence in Brazil.” Latin American Research Review 54, 2 (2019).

# Analysis conducted in R 3.6.0 on MacOS 10.13.6

# NOTE: This file replicates Appendix Tables 2, 4, and 5. We recommend running R replication files in the following order; please see readme.txt for details.
# 	1_merge_lapop.R
# 	2_merge_latinobarometro.R
# 	3_recode_reshape.R
# 	4_difference_in_distributions.R
# 	5_regressions.R
# 	6_civil_society_meeting.R
# 	7_mass_descriptives.R
# 	8_elite_descriptives.R
# 	9_mean_differences.R
#	10_difference_in_distributions_ks.R
#	11_elite_sample_simulation.R
#	12_converts_vs_lifelong.R

# Set working directory as appropriate
# setwd('~/Dropbox/brazil_leg_surveys/replication/')

# Clean desktop and load packages. Please make sure all necessary packages are installed.

rm(list=ls(all=T))

library(Hmisc)

load('mass.data.RData')

# ================================================================
# Appendix Table 2: How Public Opinion Differs for Minority Groups
# ================================================================

evang.reg<-apply(mass.data[,c('mass_econ','mass_ideol','mass_gaymarriage','mass_affirm','mass_classaffirm','mass_environment','mass_abortion')],2,function(x) lm(x~mass.data$mass_evang, weights= mass.data$mass_pweight))

loweduc.reg<-apply(mass.data[,c('mass_econ','mass_ideol','mass_gaymarriage','mass_affirm','mass_classaffirm','mass_environment','mass_abortion')],2,function(x) lm(x~mass.data$mass_loweduc, weights= mass.data$mass_pweight))

black.reg<-apply(mass.data[,c('mass_econ','mass_ideol','mass_gaymarriage','mass_affirm','mass_classaffirm','mass_environment','mass_abortion')],2,function(x) lm(x~mass.data$mass_black, weights= mass.data$mass_pweight))

female.reg<-apply(mass.data[,c('mass_econ','mass_ideol','mass_gaymarriage','mass_affirm','mass_classaffirm','mass_environment','mass_abortion')],2,function(x) lm(x~mass.data$mass_female, weights= mass.data$mass_pweight))

evang.est<-round(sapply(evang.reg,function(x) coef(x)[2]),3)
loweduc.est<-round(sapply(loweduc.reg,function(x) coef(x)[2]),3)
black.est<-round(sapply(black.reg,function(x) coef(x)[2]),3)
female.est<-round(sapply(female.reg,function(x) coef(x)[2]),3)

evang.se<-paste0('(',round(sapply(evang.reg,function(x) coef(summary(x))[2,2]),3),')')
loweduc.se<-paste0('(',round(sapply(loweduc.reg,function(x) coef(summary(x))[2,2]),3),')')
black.se<-paste0('(',round(sapply(black.reg,function(x) coef(summary(x))[2,2]),3),')')
female.se<-paste0('(',round(sapply(female.reg,function(x) coef(summary(x))[2,2]),3),')')

evang.sig<-sapply(evang.reg,function(x) coef(summary(x))[2,4]) < 0.05
loweduc.sig<-sapply(loweduc.reg,function(x) coef(summary(x))[2,4]) < 0.05
black.sig<-sapply(black.reg,function(x) coef(summary(x))[2,4]) < 0.05
female.sig<-sapply(female.reg,function(x) coef(summary(x))[2,4]) < 0.05

reg.table<-rbind(evang.est,evang.se,loweduc.est,loweduc.se,black.est,black.se,female.est,female.se)
colnames(reg.table)<-c('Economic\nRegime','Ideology','Gay\nMarriage','Race\nQuotas','Class\nQuotas','Environ-\nment','Abortion')
rownames(reg.table)<-c('Evangelical','','No College','','Afro-Brazilian','','Female','')

sig.table<-rbind(evang.sig,evang.sig,loweduc.sig,loweduc.sig,black.sig,black.sig,female.sig,female.sig)

for(i in 1:nrow(reg.table)){
	for(j in 1:ncol(reg.table)){
		reg.table[i,j]<-ifelse(sig.table[i,j], paste0('\\textbf{',reg.table[i,j],'}'), reg.table[i,j])
	}
}

bottom.note<-c('Entries are coefficients from a simple linear regression of issue opinion (scaled 0--1) on an indicator for each group. Estimated standard errors are in parentheses. Bolded entries are statistically significant at $p < 0.05$. Population weights are applied for BEPS 2010 and AB 2012.')

reg.table.latex<-latex(reg.table, file='reg_table.tex', collabel.just=rep('c',ncol(reg.table)),col.just=rep('c',ncol(reg.table)), rowlabel = '\nGroup', caption = 'How Public Opinion Differs for Minority Groups', booktabs = F, ctable = T, where = "htp", insert.bottom=bottom.note, na.blank=T, extracolsize='normalsize')

# ======================================================
# Appendix Table 5: Overlap Among Demographic Categories
# ======================================================

demog.matrix<-round(100*rbind(
apply(mass.data[,c('mass_female','mass_black','mass_evang','mass_loweduc')],2,wtd.mean,na.rm=T,weights=mass.data$mass_pweight),
apply(mass.data[mass.data$mass_female,c('mass_female','mass_black','mass_evang','mass_loweduc')],2,wtd.mean,na.rm=T,weights=mass.data$mass_pweight[mass.data$mass_female]),
apply(mass.data[mass.data$mass_black,c('mass_female','mass_black','mass_evang','mass_loweduc')],2,wtd.mean,na.rm=T,weights=mass.data$mass_pweight[mass.data$mass_black]),
apply(mass.data[mass.data$mass_evang,c('mass_female','mass_black','mass_evang','mass_loweduc')],2,wtd.mean,na.rm=T,weights=mass.data$mass_pweight[mass.data$mass_evang]),
apply(mass.data[mass.data$mass_loweduc,c('mass_female','mass_black','mass_evang','mass_loweduc')],2,wtd.mean,na.rm=T,weights=mass.data$mass_pweight[mass.data$mass_loweduc])),1)
demog.matrix[demog.matrix==100]<-NA
rownames(demog.matrix)<-c('All','Female','Afro-Brazilian','Evangelical','No College')
colnames(demog.matrix)<-c('Female','Afro-Brazilian','Evangelical','No College')

bottom.note<-"NOTE: Data are from the pooled mass surveys (years 2002--2013) used in the main analysis. Population weights are applied for BEPS 2010 and AB 2012." 

demog.matrix.latex<-latex(demog.matrix, file='demog_matrix.tex', collabel.just=rep('c',4),col.just=rep('c',4), rowlabel = '', caption = 'Overlap Among Demographic Categories', cgroup = 'Percent who are:', n.cgroup = 4, rgroup = 'Respondents', n.rgroup = 5, booktabs = F, ctable = T, where = "htp",insert.bottom=bottom.note,na.blank=T)

# =========================================================
# Appendix Table 4: Descriptive Statistics for Mass Surveys
# =========================================================

mass.data$mass_survey<-factor(gsub('\\-.*','', mass.data$mass_id))

age<-by(mass.data[,c('mass_age','mass_pweight')], mass.data$mass_survey, function(x) wtd.mean(x[,1],weights=x[,2], na.rm=T))
evang<-100*by(mass.data[,c('mass_evang','mass_pweight')], mass.data$mass_survey, function(x) wtd.mean(x[,1], weights=x[,2], na.rm=T))
loweduc<-100*by(mass.data[,c('mass_loweduc','mass_pweight')], mass.data$mass_survey, function(x) wtd.mean(x[,1], weights=x[,2], na.rm=T))
black<-100*by(mass.data[,c('mass_black','mass_pweight')], mass.data$mass_survey, function(x) wtd.mean(x[,1], weights=x[,2], na.rm=T))
female<-100*by(mass.data[,c('mass_female','mass_pweight')], mass.data$mass_survey, function(x) wtd.mean(x[,1], weights=x[,2], na.rm=T))

log.muni.pop <-by(mass.data[,c('mass_log.muni.pop','mass_pweight')], mass.data$mass_survey, function(x) wtd.mean(x[,1],weights=x[,2], na.rm=T))

mass.data$mass_south<-mass.data$mass_uf %in% c('PR','RS','SC')
mass.data$mass_southeast<-mass.data$mass_uf %in% c('SP','RJ','ES','MG')
mass.data$mass_north<-mass.data$mass_uf %in% c('AM','RR','AP','PA','TO','RO','AC')
mass.data$mass_northeast<-mass.data$mass_uf %in% c('MA','PI','CE','RN','PE','PB','SE','AL','BA')
mass.data$mass_centerwest<-mass.data$mass_uf %in% c('MT','MS','GO','DF')

south<-100*by(mass.data[,c('mass_south','mass_pweight')], mass.data$mass_survey, function(x) wtd.mean(x[,1], weights=x[,2], na.rm=T))
southeast<-100*by(mass.data[,c('mass_southeast','mass_pweight')], mass.data$mass_survey, function(x) wtd.mean(x[,1], weights=x[,2], na.rm=T))
north<-100*by(mass.data[,c('mass_north','mass_pweight')], mass.data$mass_survey, function(x) wtd.mean(x[,1], weights=x[,2], na.rm=T))
northeast<-100*by(mass.data[,c('mass_northeast','mass_pweight')], mass.data$mass_survey, function(x) wtd.mean(x[,1], weights=x[,2], na.rm=T))
centerwest<-100*by(mass.data[,c('mass_centerwest','mass_pweight')], mass.data$mass_survey, function(x) wtd.mean(x[,1], weights=x[,2], na.rm=T))

mass.descr.table<-round(rbind(age,evang,loweduc,black,female,log.muni.pop,south,southeast,north,northeast,centerwest),1)
mass.descr.table<-mass.descr.table[,c(paste0('lb0',2:7), 'lapop07','lb08','lapop08','lb09','lb10','beps','lb11','lapop12','lb13')]
colnames(mass.descr.table)<-c('LB\n2002','LB\n2003','LB\n2004','LB\n2005','LB\n2006','LB\n2007','AB\n2007','LB\n2008','AB\n2008','LB\n2009','LB\n2010','BEPS\n2010','LB\n2011','AB\n2012','LB\n2013')
rownames(mass.descr.table)<-c('Age','Evangelical','No College','Afro-Brazilian','Female','Log Population','South','Southeast','North','Northeast','Center-West')

bottom.note<-"NOTE: Entries are percentages, except for Age and Log Population, which are means. LB = Latinobar\\'ometro, AB = AmericasBarometer, BEPS = Brazilian Electoral Panel Study. Population weights are applied for BEPS 2010 and AB 2012." 

mass.descr.table.latex<-latex(mass.descr.table, file='mass_descr_table.tex', collabel.just=rep('c',ncol(mass.descr.table)),col.just=rep('c',ncol(mass.descr.table)), rowlabel = '', caption = 'Descriptive Statistics for Mass Surveys', rgroup = c('Demographics','Municipality','Region'), n.rgroup = c(5,1,5), booktabs = F, ctable = T, where = "htp", insert.bottom=bottom.note, na.blank=T, landscape=T, extracolsize='normalsize')
